############################################################ge
# Data Descriptives for Paper: "Measuring Corruption: Evidence from a World Bank Project in Kenya"
# November, 2023
############################################################

### Load Data
localpath = "/Users/jetson/Dropbox/Research/Digit Analysis/Code"
setwd(localpath)
fp = "wbdata.csv"

# Pre-clean data: group training, travel, vehicles
D <- read.csv(fp, stringsAsFactors = FALSE)
D$SectorGroup <- D$SECTOR
unique(D$SECTOR)
D$SectorGroup[D$SectorGroup == "TRN" | D$SectorGroup == "TRAVEL" | D$SectorGroup == "VEHICLES"] <- "TRN_TRV_VEH"

# Fix single stray line
D <- D[D$YEAR != "Turkana",] 

# Make collapsed data for sector plot
D$SectorGroup[D$SectorGroup == "GE" | D$SectorGroup == "CW"] <- "GE_CW"


### Describe Expenditures
ALEXP = D[!is.na(D$ALEXP.Values),]


# Expenditure Obs per District
table(ALEXP$DIST)
summary(as.numeric(table(ALEXP$DIST)))
sd(as.numeric(table(ALEXP$DIST)))

# Expenditure Obs per Year
table(ALEXP$YEAR)
summary(as.numeric(table(ALEXP$YEAR)))

# Expenditure Obs per Year
table(ALEXP$SECTORGROUP)
summary(as.numeric(table(ALEXP$SECTORGROUP)))



#  Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#   1508   56000  123984  267925  278000 9000000 

nrow(ALEXP)

hist(ALEXP, xlab = "Arid Lands Expenditures")
hist(ALEXP[ALEXP < 1000000], xlab = "Arid Land Expenditures (Up to 1M)")

boxplot(ALEXP)
library(ggplot2)

### Describe BENM/BENF
BEN = D[,c("BENM", "BENF", "BENTOT")]


BEN$BENM = as.numeric(BEN$BENM)
BEN$BENF= as.numeric(BEN$BENF)
BEN$BENTOT = as.numeric(BEN$BENTOT)

BEN <- BEN[!is.na(BEN$BENM) | !is.na(BEN$BENF) | !is.na(BEN$BENTOT),]

summary(BEN$BENM)
length(BEN$BENM[!is.na(BEN$BENM)])

summary(BEN$BENF)
length(BEN$BENF[!is.na(BEN$BENF)])

summary(BEN$BENTOT)
length(BEN$BENTOT[!is.na(BEN$BENTOT)])

nrow(BEN)
	#5499



